package com.fslq.pipeline;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fslq.pojo.SchoolInfo;
import com.fslq.pojo.SchoolSpecial;
import com.fslq.service.SchoolService;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.fslq.task.AppProcessor.MAPPER;

/*一级学科专业*/
@Component
public class SchoolSpecialDataPipeline implements Pipeline
{
    @Autowired
    private SchoolService schoolService;
    @Override
    public void process(ResultItems resultItems, Task task) {
        WebDriver driver=new ChromeDriver( new ChromeOptions().addArguments( "--headless" ) );
        List<SchoolSpecial> lists=new ArrayList<>(  );
        try{
            List<Integer> allSchoolId=schoolService.findAllSchoolId();
            allSchoolId.stream().forEach( p->{
                try{
                    System.out.println(String.valueOf(p));
                    lists.add( findInfo( driver,p) );
                } catch(JsonProcessingException e){
                    e.printStackTrace();
                }
            } );
        }catch(Exception e){
            System.out.println( e.getMessage() );
        } finally {
            //关闭浏览器（这个包括驱动完全退出，会清除内存），close 是只关闭浏览器
            driver.quit();
            if(lists!=null)
                schoolService.saveAllSpecials( lists );
        }
    }

    private SchoolSpecial findInfo(WebDriver driver, Integer s_id) throws JsonProcessingException {
        /**https://static-data.eol.cn/www/2.0/school/102/pc_special.json
         * message	"成功"
         * data
         * 1	[…]
         * 2	""
         * special_detail
         * 1	[…]
         * 2	""
         * 3	""
         * 4
         * 0
         * id	"180"
         * school_id	"102"
         * class	"化学"
         * 1
         * id	"181"
         * school_id	"102"
         * class	"海洋科学"
         * 2
         * id	"182"
         * school_id	"102"
         * class	"生物学"
         * 3
         * id	"183"
         * school_id	"102"
         * class	"生态学"
         * 4
         * id	"184"
         * school_id	"102"
         * class	"统计学"
         */
        WebDriverWait wait=new WebDriverWait( driver,50 );
        //GET PAGE CONTENT
        String url="https://static-data.eol.cn/www/2.0/school/"+s_id+"/pc_special.json";
        driver.get( url );
        //解析Json
        String split = MAPPER.readTree(wait.until(ExpectedConditions.presenceOfElementLocated(By.xpath("/html/body")))
                .getText()).get("data").get("special_detail")
                .get("4").toString();
        //封装到实体类
        SchoolSpecial special=new SchoolSpecial();
        special.setSchoolId(s_id);
        if(!split.equals("[]")){
            //截取汉字
            String[] splits = split.split("\\[")[1].split("\\]");
            Matcher matcher = Pattern.compile("([\u4e00-\u9fa5]+)").matcher(splits[0]);
            String string="";
            while (matcher.find())
                //concat string
                if (!matcher.group(0).equals("自定"))
                    string=matcher.group(0)+","+string;
            //filter last ","
            String substring = string.substring(0, string.lastIndexOf(","));
            special.setSpecials(substring);
        }
        return special;
    }
}
